from pyspark.context import SparkContext


sc = SparkContext(master="local", appName="word_count")

students_rdd = sc.textFile('../../data/students.txt')

kv_rdd = students_rdd.map(lambda x:(x.split(',')[-1],1))


kv_rdd.reduceByKey(lambda x,y:x+y).foreach(print)

group_by_key = kv_rdd.groupByKey()
group_by_key.map(lambda kv:(kv[0],sum(kv[1]))).foreach(print)